For this exercise we will work with data from the World Development Indicators (WDI). Vincent Arel-Bundock provides a nice package for R that makes it easy to import the data.
# install.packages("WDI")
library(WDI)
library(tidyverse)
library(maps)
library(ggmap)
library(ggthemes)
library(scales)
library(ggpubr)
library(countrycode)
library(ggrepel)
library(showtext)
library(keyring)
Let’s get some data on measles vaccinations. SH.IMM.MEAS seems like a good fit. But feel free to use another variable you find interesting.
WDIsearch('measles')
## indicator
## [1,] "HF.IMM.MEAS"
## [2,] "HF.IMM.MEAS.Q1"
## [3,] "HF.IMM.MEAS.Q2"
## [4,] "HF.IMM.MEAS.Q3"
## [5,] "HF.IMM.MEAS.Q4"
## [6,] "HF.IMM.MEAS.Q5"
## [7,] "SH.IMM.MEAS"
## [8,] "SH.IMM.MEAS.Q1.ZS"
## [9,] "SH.IMM.MEAS.Q2.ZS"
## [10,] "SH.IMM.MEAS.Q3.ZS"
## [11,] "SH.IMM.MEAS.Q4.ZS"
## [12,] "SH.IMM.MEAS.Q5.ZS"
## name
## [1,] "Immunization, measles (% of children ages 15-23 months)"
## [2,] "Immunization, measles (% of children ages 15-23 months): Q1 (lowest)"
## [3,] "Immunization, measles (% of children ages 15-23 months): Q2"
## [4,] "Immunization, measles (% of children ages 15-23 months): Q3"
## [5,] "Immunization, measles (% of children ages 15-23 months): Q4"
## [6,] "Immunization, measles (% of children ages 15-23 months): Q5 (highest)"
## [7,] "Immunization, measles (% of children ages 12-23 months)"
## [8,] "Vaccinations (Measles) (% of children ages 12-23 months): Q1 (lowest)"
## [9,] "Vaccinations (Measles) (% of children ages 12-23 months): Q2"
## [10,] "Vaccinations (Measles) (% of children ages 12-23 months): Q3"
## [11,] "Vaccinations (Measles) (% of children ages 12-23 months): Q4"
## [12,] "Vaccinations (Measles) (% of children ages 12-23 months): Q5 (highest)"
# There will be no data for year 2020, so here I am just using 2019 as an example
df <- WDI(indicator = "SH.IMM.MEAS" ,
start = 2019, end = 2019, extra = F)
df <- df %>%
filter(!is.na(SH.IMM.MEAS))
world_map <- map_data("world")
world_map <- world_map %>%
rename(country = region) %>%
dplyr::select(-subregion)
head(world_map)
## long lat group order country
## 1 -69.89912 12.45200 1 1 Aruba
## 2 -69.89571 12.42300 1 2 Aruba
## 3 -69.94219 12.43853 1 3 Aruba
## 4 -70.00415 12.50049 1 4 Aruba
## 5 -70.06612 12.54697 1 5 Aruba
## 6 -70.05088 12.59707 1 6 Aruba
# Check unique country names in geospatial data frame
length(unique(world_map$country))
## [1] 252
# Check in measles dataset
length(unique(df$country))
## [1] 238
# looks like we have more in geospatial data frame, measles data set also have a lot of regions
country_names <- sort(unique(world_map$country))
# English to ISO
country_iso2c <- countrycode::countrycode(country_names, origin = "country.name", destination = "iso2c")
country_combined <- data.frame(
country = country_names,
iso2c = country_iso2c)
world_map2 <- left_join(world_map, country_combined, by = "country")
combined_df <- left_join(world_map2, df %>% select(-country), by = "iso2c")
labels <- seq(0, 100, by = 20)
labels <- paste(labels, "%", sep = "")
ggplot(data = combined_df)+
geom_polygon(aes(x = long, y = lat, group = group, fill = SH.IMM.MEAS), color = "#FFFFFF")+
scale_fill_distiller(name = "Vaccinations (% of Children ages 12-23 months)",
type = "seq", palette = "YlGnBu", direction = "horizontal",
breaks = seq(0, 100, by = 20),
limits = c(0, 100),
guide = "colourbar",
labels = labels)+
theme_map()+
labs(title = "Share of Infants vaccinated against measles, 2019")+
theme(legend.position = "bottom",
plot.title = element_text(size = 11, face = "bold", hjust = 0.5))+
guides(fill = guide_colorbar(barwidth = 20, barheight = 1)) # Adjust your colorbar
countrycode() and use the countrycode function to add a region indicator to the dataset. Create a world map faceted by your region indicator.Facet
This will not be the best way to show because facet_wrap could not change scales smoothly for each of the region. You may need to write your own functions to adjust scales for each of the map. Here I am using different map projections for differnt maps and then combine those plots together.
# Here I am using regions as defined in the United Nations
combined_df2 <- combined_df %>%
mutate(region = countrycode::countrycode(country, origin = "country.name", destination = "un.region.name"))
## Warning in countrycode::countrycode(country, origin = "country.name", destination = "un.region.name"): Some values were not matched unambiguously: Antarctica, Ascension Island, Azores, Barbuda, Bonaire, Canary Islands, Chagos Archipelago, Grenadines, Heard Island, Kosovo, Madeira Islands, Micronesia, Saba, Saint Martin, Siachen Glacier, Sint Eustatius, Taiwan, Virgin Islands
# Africa
africa <- ggplot(combined_df2 %>% filter(!is.na(region)))+
geom_polygon(aes(x = long, y = lat, group = group), color = "#FFFFFF")+
geom_polygon(data = combined_df2 %>% filter(!is.na(region) & region == "Africa"),
aes(x = long, y = lat, group = group, fill = SH.IMM.MEAS), color = "#FFFFFF")+
scale_fill_distiller(name = "Vaccinations (% of Children ages 12-23 months)",
type = "seq", palette = "GnBu", direction = "horizontal",
breaks = seq(0, 100, by = 20),
limits = c(0, 100),
guide = "colourbar",
labels = labels)+
theme_map()+
labs(title = "Africa")+
theme(legend.position = "bottom",
plot.title = element_text(size = 11, face = "bold", hjust = 0.5))+
guides(fill = guide_colorbar(barwidth = 20, barheight = 1))+
coord_map("orthographic", orientation = c(-5.5, 20, 0)) # orientation: latitude, longitude, rotation
# Asia
asia <- ggplot(combined_df2 %>% filter(!is.na(region)))+
geom_polygon(aes(x = long, y = lat, group = group), color = "#FFFFFF")+
geom_polygon(data = combined_df2 %>% filter(!is.na(region) & region == "Asia"),
aes(x = long, y = lat, group = group, fill = SH.IMM.MEAS), color = "#FFFFFF")+
scale_fill_distiller(name = "Vaccinations (% of Children ages 12-23 months)",
type = "seq", palette = "GnBu", direction = "horizontal",
breaks = seq(0, 100, by = 20),
limits = c(0, 100),
guide = "colourbar",
labels = labels)+
theme_map()+
labs(title = "Asia")+
theme(legend.position = "bottom",
plot.title = element_text(size = 11, face = "bold", hjust = 0.5))+
guides(fill = guide_colorbar(barwidth = 20, barheight = 1))+
coord_map("orthographic", orientation = c(30, 95, 0))
# America
america <- ggplot(combined_df2 %>% filter(!is.na(region)))+
geom_polygon(aes(x = long, y = lat, group = group), color = "#FFFFFF")+
geom_polygon(data = combined_df2 %>% filter(!is.na(region) & region == "Americas"),
aes(x = long, y = lat, group = group, fill = SH.IMM.MEAS), color = "#FFFFFF")+
scale_fill_distiller(name = "Vaccinations (% of Children ages 12-23 months)",
type = "seq", palette = "GnBu", direction = "horizontal",
breaks = seq(0, 100, by = 20),
limits = c(0, 100),
guide = "colourbar",
labels = labels)+
theme_map()+
labs(title = "Americas")+
theme(legend.position = "bottom",
plot.title = element_text(size = 11, face = "bold", hjust = 0.5))+
guides(fill = guide_colorbar(barwidth = 20, barheight = 1))+
coord_map("orthographic", orientation = c(15, -90, 5))
# Europe
europe <- ggplot(combined_df2 %>% filter(!is.na(region)))+
geom_polygon(aes(x = long, y = lat, group = group), color = "#FFFFFF")+
geom_polygon(data = combined_df2 %>% filter(!is.na(region) & region == "Europe"),
aes(x = long, y = lat, group = group, fill = SH.IMM.MEAS), color = "#FFFFFF")+
scale_fill_distiller(name = "Vaccinations (% of Children ages 12-23 months)",
type = "seq", palette = "GnBu", direction = "horizontal",
breaks = seq(0, 100, by = 20),
limits = c(0, 100),
guide = "colourbar",
labels = labels)+
theme_map()+
labs(title = "Europe")+
theme(legend.position = "bottom",
plot.title = element_text(size = 11, face = "bold", hjust = 0.5))+
guides(fill = guide_colorbar(barwidth = 20, barheight = 1))+
coord_map("orthographic", orientation = c(70, 50, 5))
# Oceania
oceania <- ggplot(combined_df2 %>% filter(!is.na(region)))+
geom_polygon(aes(x = long, y = lat, group = group), color = "#FFFFFF")+
geom_polygon(data = combined_df2 %>% filter(!is.na(region) & region == "Oceania"),
aes(x = long, y = lat, group = group, fill = SH.IMM.MEAS), color = "#FFFFFF")+
scale_fill_distiller(name = "Vaccinations (% of Children ages 12-23 months)",
type = "seq", palette = "GnBu", direction = "horizontal",
breaks = seq(0, 100, by = 20),
limits = c(0, 100),
guide = "colourbar",
labels = labels)+
theme_map()+
labs(title = "Oceania")+
theme(legend.position = "bottom",
plot.title = element_text(size = 11, face = "bold", hjust = 0.5))+
guides(fill = guide_colorbar(barwidth = 20, barheight = 1))+
coord_map("orthographic", orientation = c(-35, 135, 0))
ggpubr::ggarrange(africa, asia, america, europe, oceania, common.legend = TRUE, legend = "bottom", ncol = 5)
For this exercise, we want to create a map of where the Fortune 500 companies - that is the five hundred largest U.S. corporations by total revenue - have their headquarters.
Let’s get the addresses here: https://www.geolounge.com/fortune-500-list-by-state-for-2015/
library(XML)
library(RCurl)
fortune500_url <- getURL("https://www.geographyrealm.com/fortune-500-list-by-state-for-2015/",.opts = list(ssl.verifypeer = FALSE) ) # We needs this because the site is https
fortune500 = readHTMLTable(fortune500_url, header = TRUE, which = 1)
colnames(fortune500) <- tolower(colnames(fortune500))
fortune500 <- subset(fortune500, select=c("company","streetadd","place","state","zip"))
write.csv(fortune500, "fortune500.csv")
Load the list of Fortune 500 companies (in 2015).
fortune500 <- read_csv("fortune500.csv")[, -1]
Task: Aggregate the number of headquarters by state. Make a map in which the states are shaded by their number of Fortune 500 companies
Use dplyr() for the aggregation and the maps() package for a map of the U.S. with state boundaries.
companies_by_state <- fortune500 %>%
group_by(state) %>%
count() %>%
ungroup() %>%
arrange(desc(n))
us_states <- map_data("state")
us_states <- us_states %>%
dplyr::select(-subregion) %>%
rename(state = region)
# Change first letter of U.S state to uppercase
us_states$state <- str_to_title(us_states$state)
# Check if there is unmatched names
unique(companies_by_state$state) %in% unique(us_states$state)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE
# District of Columbia name does not match
unique(companies_by_state$state)[33]
## [1] "District of Columbia"
sort(unique(us_states$state)) %in% sort(unique(companies_by_state$state))
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE
## [25] FALSE TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE
## [49] FALSE
sort(unique(us_states$state))[c(8, 18, 23, 25, 28, 30, 33, 40, 44, 47, 49)]
## [1] "District Of Columbia" "Maine" "Mississippi"
## [4] "Montana" "New Hampshire" "New Mexico"
## [7] "North Dakota" "South Dakota" "Vermont"
## [10] "West Virginia" "Wyoming"
us_states <- us_states %>%
mutate(state = ifelse(state == "District Of Columbia",
"District of Columbia",
state))
# Merge dataset
us_states <- left_join(us_states, companies_by_state, by = "state")
ggplot(us_states)+
geom_polygon(aes(x = long, y = lat, group = group, fill = n), color = "#FFFFFF")+
scale_fill_gradient2(name = "Number of Fortune 500 Companies",
low = "#e5f5e0", mid = "#a1d99b", high = "#31a354",
na.value = "grey50",
breaks = seq(0, 60, 10),
limits = c(0, 60),
guide = "colourbar")+
theme_map()+
guides(fill = guide_colorbar(barwidth = 10, barheight = 1))+
labs(title = "Number of Fortune 500 Companies by State")+
theme(legend.position = "bottom",
plot.title = element_text(size = 10, face = "bold", hjust = 0.5))
There are 10 Continental U.S states that do not have Fortune 500 Companies.
Task: We also got some info on top corporate income tax rates by state. Import sheet 2 of the file State_Corporate_Income_Tax_Rates_2015.xlsx and make a map shaded by top corporate income tax rates.
library(readxl)
state_income_tax <- read_xlsx("State_Corporate_Income_Tax_Rates_2015.xlsx", sheet = 2)
head(state_income_tax, 5)
## # A tibble: 5 x 2
## state topcorpinctax
## <chr> <dbl>
## 1 Alabama 0.065
## 2 Alaska 0.094
## 3 Arizona 0.06
## 4 Arkansas 0.065
## 5 California 0.0884
# Here I am using another package, which could plot Alaska and Hawaii
library(usmap)
us_states2 <- usmap::us_map(regions = "states") %>%
rename(state = full)
# Check if there is unmatched states' names
sort(unique(state_income_tax$state)) %in% sort(unique(us_states2$state))
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE
sort(unique(us_states2$state)) %in% sort(unique(state_income_tax$state))
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE
sort(unique(state_income_tax$state))[8]
## [1] "D.C."
sort(unique(us_states2$state))[9]
## [1] "District of Columbia"
state_income_tax <- state_income_tax %>%
mutate(state = ifelse(state == "D.C.",
"District of Columbia",
state))
us_states2 <- left_join(us_states2, state_income_tax, by = "state")
ggplot(us_states2)+
geom_polygon(aes(x = x, y = y, group = group, fill = topcorpinctax), color = "#FFFFFF")+
scale_fill_gradient2(name = "Corporate Income Tax Rates by State",
low = "#fee0d2",
mid = "#fc9272",
high = "#de2d26",
limits = c(0, 0.12),
labels = scales::percent)+
labs(title = "Top Corporate Income Tax Rates by State")+
theme_map()+
coord_equal()+
guides(fill = guide_colorbar(barwidth = 12, barheight = 1))+
theme(legend.position = "bottom",
plot.title = element_text(size = 10, face = "bold", hjust = 0.5))
Is there evidence of companies being headquartered in low tax states?
sort(unique(companies_by_state$state)) %in% sort(unique(state_income_tax$state))
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Merge Dataset
mydata <- left_join(companies_by_state, state_income_tax, by = "state")
head(mydata)
## # A tibble: 6 x 3
## state n topcorpinctax
## <chr> <int> <dbl>
## 1 New York 55 0.071
## 2 Texas 54 0
## 3 California 53 0.0884
## 4 Illinois 34 0.0775
## 5 Ohio 23 0
## 6 Michigan 21 0.06
state_names <- tibble(state = state.name) %>%
bind_cols(tibble(abbr = state.abb)) %>%
bind_rows(tibble(state = "District of Columbia",
abbr = "DC"))
sort(mydata$state) %in% sort(state_names$state)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
mydata <- left_join(mydata, state_names, by = "state")
ggplot(data = mydata, aes(x = topcorpinctax, y = n))+
geom_point(color = "#c51b8a", alpha = 0.5)+
geom_text_repel(aes(label = abbr))+
geom_smooth(method = "loess", se = FALSE)+
scale_x_continuous(labels = scales::percent)+
scale_y_continuous(limits = c(0, 60),
breaks = seq(0, 60, 20))+
labs(title = "Number of Fortune 500 Companies vs. State Corporate Income Tax")+
theme_fivethirtyeight()+
theme(plot.title = element_text(size = 12, hjust = 0, family = "Montserrat"))
## `geom_smooth()` using formula 'y ~ x'
Task: 1. Make a scatter plot with a loess function estimating the relationship between corporate income tax rates (x-axis) and # of headquarters (y-axis). 2. What happens if we account for state population (i.e use HQs per capita)? Import the data on state populations from Wikipedia: https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population. Use the function readHTMLTable() from the package XML to import the data and merge it on. Re-do the scatter plot from part 1.
library(XML)
library(RCurl)
##
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
##
## complete
us_population_url <- getURL("https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population")
# Third Table of Wikipedia page
state_population <- readHTMLTable(us_population_url, header = FALSE, which = 3, skip.rows = 1)
# Use only 2019 Population
state_population <- state_population %>%
rename(state = V1,
pop2019 = V3) %>%
filter(state %in% unique(mydata$state)) %>%
dplyr::select(state, pop2019)
head(state_population)
## state pop2019
## 1 Massachusetts 6,892,503
## 2 Connecticut 3,565,287
## 3 Rhode Island 1,059,361
## 4 New York 19,453,561
## 5 Pennsylvania 12,801,989
## 6 New Jersey 8,882,190
# Merge Data set again
mydata2 <- left_join(mydata, state_population, by = "state")
str(mydata2) # population is string
## tibble [39 × 5] (S3: tbl_df/tbl/data.frame)
## $ state : chr [1:39] "New York" "Texas" "California" "Illinois" ...
## $ n : int [1:39] 55 54 53 34 23 21 20 19 19 18 ...
## $ topcorpinctax: num [1:39] 0.071 0 0.0884 0.0775 0 0.06 0.09 0.06 0.06 0.0999 ...
## $ abbr : chr [1:39] "NY" "TX" "CA" "IL" ...
## $ pop2019 : chr [1:39] "19,453,561" "28,995,881" "39,512,223" "12,671,821" ...
mydata2$pop2019 <- gsub(pattern = "\\,", replacement = "", x = mydata2$pop2019)
mydata2$pop2019 <- as.numeric(mydata2$pop2019)
str(mydata2)
## tibble [39 × 5] (S3: tbl_df/tbl/data.frame)
## $ state : chr [1:39] "New York" "Texas" "California" "Illinois" ...
## $ n : int [1:39] 55 54 53 34 23 21 20 19 19 18 ...
## $ topcorpinctax: num [1:39] 0.071 0 0.0884 0.0775 0 0.06 0.09 0.06 0.06 0.0999 ...
## $ abbr : chr [1:39] "NY" "TX" "CA" "IL" ...
## $ pop2019 : num [1:39] 19453561 28995881 39512223 12671821 11689100 ...
# change population scales: millions
mydata2$pop2019 <- mydata2$pop2019/1000000
ggplot(mydata2, aes(x = topcorpinctax, y = n/pop2019))+
geom_point(aes(size = pop2019), color = "#c51b8a", alpha = 0.5)+
geom_text_repel(aes(label = abbr))+
scale_size_continuous(range = c(0.1, 8))+
scale_x_continuous(labels = scales::percent)+
scale_y_continuous("Number of Fortune 500 Companies per Million of People", limits = c(0, 5))+
geom_smooth(method = "loess", se = FALSE)+
ggtitle("Number of Fortune 500 Companies per Million of People vs. State Corporate Income Tax")+
guides(size = FALSE)+
theme_fivethirtyeight()+
theme(axis.title.y = element_text(size = 8, face = "bold"),
plot.title = element_text(size = 9, hjust = 0, family = "Montserrat"))
## `geom_smooth()` using formula 'y ~ x'
Now, let’s get the Latitude and Longitude coordinates of all these addresses of the HQ addresses. Fortunately, ggmap() does this for us nicely.
# This is Walmart's HQ address:
geocode("702 S.W. Eighth St. Bentonville Arkansas 72716", output = "latlon" , source = "google")
Task: 1. Geocode the locations of all headuarters in the sample. Add the locations of the headquarters as points to the map from part 2 (U.S. state map shaded by top corporate income tax rates).
I would say don’t try this. This could make your graph looks very terrible
fortune500_geocodes <- read_csv("fortune500_geocodes.csv")
# Load packages
library(leaflet)
library(htmltools)
library(rgdal)
library(leaflet.providers)
# Load shape files
states <- readOGR("cb_2016_us_state_500k/cb_2016_us_state_500k.shp")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/yifei/Documents/QMSS/data_visualization/data_viz_review/exercise_samples/05_maps/cb_2016_us_state_500k/cb_2016_us_state_500k.shp", layer: "cb_2016_us_state_500k"
## with 56 features
## It has 10 fields
## Integer64 fields read as strings: aland awater
# Make Sure 2 data frames states names are matching
sort(unique(states$name))
# Looks like state income tax states name are matching
is.element(state_income_tax$state, states$name)
is.element(states$name, state_income_tax$state)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE FALSE
## [49] FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
which(is.element(states$name, state_income_tax$state) == FALSE)
## [1] 43 44 45 48 49
states$name[c(43, 44, 45, 48, 49)]
## [1] "Guam"
## [2] "Commonwealth of the Northern Mariana Islands"
## [3] "United States Virgin Islands"
## [4] "American Samoa"
## [5] "Puerto Rico"
# Let's remove those regions from shape file
states <- subset(states, name %in% state_income_tax$state)
# Reorder data file to match our shape file
state_income_tax <- state_income_tax[order(match(state_income_tax$state, states$name)), ]
# Looks like they are matching now
head(state_income_tax$state)
## [1] "Alabama" "Virginia" "Washington" "Alaska" "Arizona"
## [6] "Arkansas"
head(states$name)
## [1] "Alabama" "Virginia" "Washington" "Alaska" "Arizona"
## [6] "Arkansas"
range(state_income_tax$topcorpinctax)
## [1] 0.00 0.12
bins <- seq(0.00, 0.12, by = 0.02)
bins
## [1] 0.00 0.02 0.04 0.06 0.08 0.10 0.12
pal <- colorBin("Greens", domain = state_income_tax$topcorpinctax, bins = bins)
fortune500_geocodes <- fortune500_geocodes %>%
mutate(rank = row_number(),
size = rev(row_number()))
fortune500_geocodes$label <- paste("Company:", fortune500_geocodes$company, "<br>",
"State:", fortune500_geocodes$state, "<br>",
"Rank:", fortune500_geocodes$rank)
leaflet() %>%
addProviderTiles(providers$Esri.WorldGrayCanvas) %>%
setView(lng = -96, lat = 37.8, zoom = 4) %>%
addPolygons(data = states,
weight = 1,
smoothFactor = 0.5,
color = "#FFFFFF",
fillColor = pal(state_income_tax$topcorpinctax),
fillOpacity = 0.8) %>%
addCircleMarkers(lng = fortune500_geocodes$lon,
lat = fortune500_geocodes$lat,
color = "#F0810F",
weight = 1,
radius = (fortune500_geocodes$size)^(0.5),
label = lapply(fortune500_geocodes$label, HTML),
labelOptions = list(textsize = "12px"))